di "***********************************"
di "************APPEND 2017 2020*******"
di "************LONG FORM *************"
di "************N=4187***********************"
use  "${SOEP}OUTDATA/inno2017_pgen_P_H",clear
append using "${SOEP}OUTDATA/inno2020_PB_HH.dta",nolabel
  
  
de,s
tab year
drop if year==2018
bys pid (SC): gen num_appear=_N

bys pid (yrs_edu_2019): replace yrs_edu_2019=yrs_edu_2019[1] /*expand 2019 to 2020*/
bys pid: replace yrs_edu=yrs_edu_2019 if year==2020 /*replace yrs_edu in 2020 to 2019 value*/


	duplicates report pid
	bys pid (year): gen SC2017=SC[1]
	bys pid (year): gen SC2020=SC[2]


di "******************************************"
di "********Merge with time invariant X******"
di "********YOB ****************************"
merge m:1 pid using "${SOEP}RAW/soep-is.2019_stata_en/ppfad.dta",generate(_merge_ppfad)/*pid uniquely identify*/
keep if _merge_ppfad==3 /*delete non-innovation sample in using */

gen male=(sex==1)
*di "Using Survey Year=2017"
gen Age=syear-gebjahr 
tab Age
	gen agegroup=0 if Age<25 
replace agegroup=1 if inrange(Age,25,30)
replace agegroup=2 if inrange(Age,30,35)
replace agegroup=3 if inrange(Age,35,40)
replace agegroup=4 if inrange(Age,40,45)
replace agegroup=5 if inrange(Age,45,50)
replace agegroup=6 if inrange(Age,50,55)
replace agegroup=7 if inrange(Age,55,60)
replace agegroup=8 if inrange(Age,60,65)
replace agegroup=9 if inrange(Age,65,70)
replace agegroup=10 if inrange(Age,70,75)
replace agegroup=11 if inrange(Age,75,100)


#delimit ;
label var agegroup "Age groups";
capture label drop agegroup;
label define agegroup 	0 "Age 17-24" 
						1 "Age 25-29"
						2 "Age 30-34"
						3 "Age 35-39"
						4 "Age 40-44"
						5 "Age 45-49"
						6 "Age 50-54"					
						7 "Age 55-59"
						8 "Age 60-64"
						9 "Age 65-69"
						10 "Age 70-74"					
						11 "Age 75 above"
						;
label values agegroup agegroup;
#delimit cr

tab edu_2019 year
bys pid (edu_2019): replace edu=edu_2019[1] if edu==.
replace edu=. if edu<=0
di "************************"
di "***life events shock****"
di "************************"

		replace emp=perw if inrange(perw,1,4)
		replace emp=5 if inrange(perw,5,9)

		gen empshock=0
		bys pid (year): replace empshock=1 if emp[1]==1 & inrange(emp[2],2,5) /*full time to others*/
		bys pid (year): replace empshock=1 if emp[1]==2 & inrange(emp[2],4,5) /*parttime to marginal or not employed*/

		tab ms
		replace ms=1 if pfamst_n==1
		replace ms=1 if ms==2 /*no married but separated in 2020*/
		replace ms=3 if pfamst_n==3
		replace ms=4 if pfamst_n==4
		replace ms=5 if pfamst_n==6
		replace ms=6 if pfamst_n==2
		replace ms=7 if pfamst_n==5
		replace ms=. if ms<0
		tab ms year

		gen msshock=0
		bys pid (year): replace msshock=1 if ms[1]==1 & inrange(ms[2],3,5) /*full time to others*/
		bys pid (year): replace msshock=1 if ms[1]==6 & ms[2]==7 /*parttime to marginal or not employed*/


		gen passaway=0
		replace passaway=1 if inlist(1,pfs081, pfs091,pfs121,pfs131)
		bys pid (year): replace passaway=1 if passaway[2]==1
		di "childbirth"
		bys pid (childbirth): replace childbirth=childbirth[1]
		di "first job"
		gen firstjob=0
		bys pid (year): replace firstjob=1 if studying[1]==1 & inlist(emp[2],1,2)
		sum empshock msshock passaway
	tabstat empshock msshock passaway, stat(n mean sd) format(%9.3f)



		sum lginc
		bys pid (lginc): replace lginc=lginc[1]
		
di "PERSON INCOME"		
	gen person_labgro_income=pglabgro if pglabgro>=0 /*only for 2017*/
	gen grossearningslastmonth=pbrut if pbrut>=0 /*only for 2020*/

	bys pid (grossearningslastmonth):replace grossearningslastmonth=grossearningslastmonth[1]
	bys pid (person_labgro_income):replace person_labgro_income=person_labgro_income[1]
	bys year: sum person_labgro_income grossearningslastmonth
	capture drop monthlyincome
	gen monthlyincome=person_labgro_income if year==2017
	replace monthlyincome=grossearningslastmonth if year==2020

	bys year: sum monthlyincome

save  "/Users/wkon4921/Dropbox (Sydney Uni)/SelfControl/SOEP/OUTDATA/attrition_V1",replace
use  "${SOEP}OUTDATA/attrition_V1",clear

tab year

gen sample2017=1 if year==2017
gen sample2020=1 if year==2020
bys pid (sample2017): replace sample2017=sample2017[1]
bys pid (sample2020): replace sample2020=sample2020[1]

replace sample2017=0 if sample2017==.
replace sample2020=0 if sample2020==.
gen monthlyincome000=monthlyincome*1000
la var monthlyincome000 "Personal income in 1000 euros"
xi: probit sample2020 yrs_edu monthlyincome i.male Age 1.ms 3.ms 4.ms 5.ms i.emp
			outreg2 using "../OUTPUT/attrition_V1.xls", replace ctitle("Prob(Stay)") label  nocons nonotes
xi: probit sample2020 yrs_edu monthlyincome000 male Age 1.ms 3.ms 4.ms 5.ms 1.emp 2.emp 3.emp 5.emp if year==2017
			outreg2 using "../OUTPUT/attrition_V3.xls", replace ctitle("Prob(Stay=1)") label  nocons nonotes
xi: probit sample2020 Age male yrs_edu  1.ms 3.ms 4.ms 5.ms 1.emp 2.emp 3.emp 5.emp if year==2017
			outreg2 using "../OUTPUT/attrition_V3.xls", replace ctitle("Prob(Stay=1)") label  nocons nonotes

			
di "****************************************************"
		
di "***SC BTW STAYED AND DROPPED OUT IN 2017***"			
gen attrited=0 if year==2017
replace attrited=1 if year==2017 & sample2017==1 & sample2020!=1	

eststo Stayed: quietly estpost summarize ///
    SC if attrited == 0
eststo Left: quietly estpost summarize ///
    SC if attrited == 1
eststo Difference:  estpost ttest ///
    SC, by(attrited) unequal

esttab Stayed Left Difference using "${OUTPUT}SC_attrition.csv", ///
cells("mean(pattern(1 1 0) fmt(2)) sd(pattern(1 1 0)) b(star pattern(0 0 1) fmt(2)) t(pattern(0 0 1) par fmt(2))") ///
label
di "*****IPW*********"


tab year

gen sample2017=1 if year==2017
gen sample2020=1 if year==2020
bys pid (sample2017): replace sample2017=sample2017[1]
bys pid (sample2020): replace sample2020=sample2020[1]

replace sample2017=0 if sample2017==.
replace sample2020=0 if sample2020==.
gen monthlyincome000=monthlyincome*1000
la var monthlyincome000 "Personal income in 1000 euros"

save "${SOEP}OUTDATA/attrition_V2",replace
xi: probit sample2020 Age male yrs_edu  1.ms 3.ms 4.ms 5.ms 1.emp 2.emp 3.emp 5.emp if year==2017
predict prob_stay ,pr

di "2017==prob_stay method"
	capture drop prob
	*gen prob=1 if year==2017
	gen prob=prob_stay if year==2017
	bys pid (prob): replace prob=prob[1]
	
	bys year:sum prob
	capture drop wt
	gen wt=.
	replace wt=1/prob
	
	keep pid year wt
	save "${SOEP}ipw_wt_V2.dta"
	
	
di "****************************************************"
di "*****Table 1 Life Events **ADD IPW and MALE DUMMY****"
di "****************************************************"						
use "${SOEP}OUTDATA/inno2017_2020_ppfad_V3",clear /*Long form from V3*/
merge 1:1 pid year using "${SOEP}ipw_wt_V2.dta", generate(_ipw)
sum SDdifSC male Age yrs_edu monthlyincome

sum SDdifSC empshock msshock passaway i.agegroup yrs_edu lginc

global le empshock msshock passaway childbirth sincepandemic
		local name Table1IPW_V2
		local replace replace
	foreach v in  $le {
		reg dif_SC `v' [pw=wt]
			outreg2 using "`name'.xls", `replace' ctitle("Dif_SC") label addtext(Controls, No)  nocons nonotes  alpha(0.001, 0.01, 0.05) /*V4 changed level of signifcance*/

	local replace append
		reg dif_SC `v' i.agegroup [pw=wt]
			outreg2 using "`name'.xls", `replace' ctitle("Dif_SC") label addtext(Age, Yes) nocons nonotes alpha(0.001, 0.01, 0.05)
		reg dif_SC `v' i.agegroup  yrs_edu  [pw=wt]
			outreg2 using "`name'.xls", `replace' ctitle("Dif_SC") label addtext(Edu,Yes,Age, Yes) nocons nonotes alpha(0.001, 0.01, 0.05)
		reg dif_SC `v' i.agegroup  yrs_edu   lginc [pw=wt]
			outreg2 using "`name'.xls", `replace' ctitle("Dif_SC") label addtext(Edu,Yes, Age, Yes, Inc, Yes) nocons nonotes alpha(0.001, 0.01, 0.05)
		reg dif_SC `v' i.agegroup  yrs_edu   lginc male [pw=wt]
			outreg2 using "`name'.xls", `replace' ctitle("Dif_SC") label addtext(Edu,Yes, Age, Yes, Inc, Yes) nocons nonotes alpha(0.001, 0.01, 0.05)
		}
		
	
di "****************************************************"
di "*****Table 2 SEM*********ADD IPW and MALE DUMMY***********************"
di "****************************************************"
		
use "${SOEP}OUTDATA/inno2017_2020_long_SEM_V1",clear		
merge 1:1 pid year using "${SOEP}ipw_wt_V2.dta", generate(_ipw)
		
		di "****SEM w controls*************"
	global x i.agegroup  i.edu i.ms lginc	

	global x1 i.agegroup lginc unemp pcs male
	global x2 i.agegroup lginc i.edu male
	global x3 i.agegroup i.edu unemp male
			local name Table2IPW_V2
			local replace replace
		xi:sem (selfcontrol1* selfcontrol201 selfcontrol202 <-SELFCONTROL)(ls<-SELFCONTROL $x1) [pw=wt] ,variance(SELFCONTROL@1) 
				outreg2 using "`name'.xls", `replace' label nocons nonotes alpha(0.001, 0.01, 0.05)
			local replace append
		xi:sem (selfcontrol1* selfcontrol201 selfcontrol202 <-SELFCONTROL)(pcs<-SELFCONTROL $x2) [pw=wt],variance(SELFCONTROL@1) 
				outreg2 using "`name'.xls", `replace' label nocons nonotes alpha(0.001, 0.01, 0.05)
		xi:sem (selfcontrol1* selfcontrol201 selfcontrol202 <-SELFCONTROL)(mcs<-SELFCONTROL $x2) [pw=wt] ,variance(SELFCONTROL@1) 
				outreg2 using "`name'.xls", `replace' label nocons nonotes alpha(0.001, 0.01, 0.05)
		xi:sem (selfcontrol1* selfcontrol201 selfcontrol202 <-SELFCONTROL)(monthlyincome<-SELFCONTROL $x3) [pw=wt] ,variance(SELFCONTROL@1) 
				outreg2 using "`name'.xls", `replace' label nocons nonotes alpha(0.001, 0.01, 0.05)

			
di "****OLS w controls*************"	
		*	local replace replace		
		reg ls SdSC $x1 [pw=wt]
				outreg2 using "`name'.xls", `replace' label nocons nonotes alpha(0.001, 0.01, 0.05)
			local replace append
		reg pcs SdSC $x2 [pw=wt]
				outreg2 using "`name'.xls", `replace' label nocons nonotes alpha(0.001, 0.01, 0.05)
		reg mcs SdSC $x2 [pw=wt]
				outreg2 using "`name'.xls", `replace' label nocons nonotes alpha(0.001, 0.01, 0.05)
		reg monthlyincome SdSC $x3 [pw=wt]
				outreg2 using "`name'.xls", `replace' label nocons nonotes alpha(0.001, 0.01, 0.05)

di "*****TABLE A2 Attrition****************************"
use  "${SOEP}OUTDATA/attrition_V1",clear	

di "***ADDING SC*****"
xi: probit sample2020 SC Age male yrs_edu  1.ms 3.ms 4.ms 5.ms 1.emp 2.emp 3.emp 5.emp if year==2017
			outreg2 using "${SOEP}OUTPUT/attrition_V4.xls", replace ctitle("Prob(Stay=1)") label  nocons nonotes alpha(0.001, 0.01, 0.05)
	

di "******TABLE A3_SPEARMAN CORR BY DECILE AGE******"

spearman SC2017 SC2020 if inrange(Age,17,26)
local sp1= r(rho)
spearman SC2017 SC2020 if inrange(Age,27,36)
local sp2= r(rho)
spearman SC2017 SC2020 if inrange(Age,37,46)
local sp3= r(rho)
spearman SC2017 SC2020 if inrange(Age,47,56)
local sp4= r(rho)
spearman SC2017 SC2020 if inrange(Age,57,66)
local sp5= r(rho)
spearman SC2017 SC2020 if inrange(Age,67,76)
local sp6= r(rho)
spearman SC2017 SC2020 if inrange(Age,77,86)
local sp7= r(rho)
spearman SC2017 SC2020 if inrange(Age,87,96)
local sp8= r(rho)

spearman SC2017 SC2020 if inrange(Age,77,96)
local sp7= r(rho)
spearman SC2017 SC2020 if inrange(Age,87,96)
local sp8= r(rho)

di `sp1'
di `sp2'
di `sp3'
di `sp4'
di `sp5'
di `sp6'
di `sp7'
di `sp8'

di "*****TABLE A4********************************"	
di "rank stability by age, using overall ranks"
use "${SOEP}OUTDATA/inno2017_2020_ppfad_V3",clear /*Long form from V3*/

	bys year: egen rank_SC=rank(SC)
	bys pid (year): gen rank_SC2017=rank_SC[1]
	bys pid (year): gen rank_SC2020=rank_SC[2]
	su rank_SC2020 rank_SC2017
	gen dif_rank_SC=rank_SC2020-rank_SC2017
cd "${SOEP}OUTPUT/"
sum   dif_rank_SC /*mean=0    sd=290.0694 */
reg dif_rank_SC i.agegroup
	outreg2 using "dif_rank_SC.xls", `replace' label nocons nonotes alpha(0.001, 0.01, 0.05)
 tab agegroup,gen(AgeGroup)
	reg dif_rank_SC  AgeGroup1 AgeGroup2 AgeGroup3 AgeGroup4 AgeGroup5 AgeGroup6 AgeGroup7 AgeGroup8 AgeGroup9 AgeGroup10 AgeGroup11 AgeGroup12

	test AgeGroup1 AgeGroup2 AgeGroup3 AgeGroup4 AgeGroup5 AgeGroup6 AgeGroup7 AgeGroup8 AgeGroup9 AgeGroup10 AgeGroup11 AgeGroup12
        